Introduction

In this script we are going to map myeloid cell subtypes onto the Visium slides.

Libraries

library(Seurat)
library(ggpubr)
library(cowplot)
library(dplyr)
library(ggplot2)
library(stringr)
library(readr)
library(SPOTlight)

Setting parameters

Loading necessary paths and parameters

set.seed(123)
source(here::here("misc/paths.R"))
source(here::here("utils/bin.R"))

"{myeloid}/{plt_dir}" %>%
  glue::glue() %>%
  here::here() %>%
  dir.create(path = ,
             showWarnings = FALSE,
             recursive = TRUE)

"{myeloid}/{robj_dir}" %>%
  glue::glue() %>%
  here::here() %>%
  dir.create(path = ,
             showWarnings = FALSE,
             recursive = TRUE)

myel_vec <- c(
  "IFN1+ PDC", "PDC", "aDC1", "aDC2", "aDC3", "C1Q HLA macrophages", "Cycling",
  "DC1 mature", "DC1 precursor", "DC2", "DC3", "DC4", "DC5", "IL7R DC", 
  "IL7R MMP12 macrophages", "ITGAX ZEB2 macrophages", "M1 Macrophages", 
  "Mast cells", "Monocytes", "Neutrophil Granulocytes",
  "SELENOP FUCA1 PTGDS macrophages")

Extract sample id and get Donor ID

# sample_id <- params$sample_id
sample_id <- "esvq52_nluss5"
donor_id <- id_sp_df[id_sp_df$gem_id == sample_id, ] %>% dplyr::pull(donor_id)

Load data

The spatial data comes from the script 03-clustering/03-clustering_integration.Rmd while the sc data can be found in Ramon’s scRNAseq analysis: /scratch/devel/rmassoni/tonsil_atlas_private/2-DOWNSTREAM_PROCESSING/results/R_objects/processed_seurat_objects/processed_seurat_objects/tonsil_integrated_with_harmony_scrublet_annotated.rds.

sp_obj <- "misc/{robj_dir}/20220215_tonsil_atlas_spatial_seurat_obj.rds" %>%
  glue::glue() %>%
  here::here() %>%
  readRDS(file = .)

# Single cell data
sc_obj <- "misc/{robj_dir}/20220215_tonsil_atlas_rna_seurat_obj.rds" %>%
  glue::glue() %>%
  here::here() %>%
  readRDS(file = .)

Analysis

Metadata

We are going to use SPOTlight to deconvolute the spatial spots and map the cell types.

We will start by adding the level5 definitive metadata labels to the full object.

sc_obj@meta.data <- sc_obj@meta.data %>%
  dplyr::mutate(
    # Annotation for SPOTlight
    annotation_deconv = dplyr::if_else(
      annotation_20220215 %in% myel_vec,
      as.character(annotation_20220215), as.character(annotation_level_1))
    )

# Remove preBC, preTC since we are not interested in them and they are minor pops
# Remove myeloid since all the "myeloid" cells are already in lvl5
sc_obj <- sc_obj[, ! sc_obj$annotation_deconv %in% c("preBC", "preTC", "myeloid")]

table(sc_obj@meta.data$annotation_deconv)
## 
##                            aDC1                            aDC2                            aDC3             C1Q HLA macrophages                           CD4_T                         Cycling                       Cytotoxic                      DC1 mature                   DC1 precursor                             DC2                             DC3                             DC4                             DC5                      epithelial                             FDC                            GCBC                       IFN1+ PDC                         IL7R DC          IL7R MMP12 macrophages          ITGAX ZEB2 macrophages                  M1 Macrophages                      Mast cells                       Monocytes                         NBC_MBC         Neutrophil Granulocytes                              PC                             PDC SELENOP FUCA1 PTGDS macrophages 
##                              81                              16                              83                             301                           58783                              70                            6009                              86                             104                             181                              20                              28                              95                             361                            1203                           72174                              50                              27                             166                             171                              72                              37                              93                          112478                             155                            9088                             432                             446
# level 5 Myeloid T Cells
exist <- myel_vec %in% unique(sc_obj$annotation_deconv)
names(exist) <- myel_vec
exist
##                       IFN1+ PDC                             PDC                            aDC1                            aDC2                            aDC3             C1Q HLA macrophages                         Cycling                      DC1 mature                   DC1 precursor                             DC2                             DC3                             DC4                             DC5                         IL7R DC          IL7R MMP12 macrophages          ITGAX ZEB2 macrophages                  M1 Macrophages                      Mast cells                       Monocytes         Neutrophil Granulocytes SELENOP FUCA1 PTGDS macrophages 
##                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE                            TRUE
table(exist)
## exist
## TRUE 
##   21
lvl5_mye_obj <- sc_obj[, sc_obj$annotation_deconv %in% myel_vec]

Marker calculation

Since the normalization was carried out with the entire dataset we are going to do normalization again within this subset

sc_obj <- Seurat::NormalizeData(
  sc_obj,
  normalization.method = "LogNormalize",
  scale.factor = 10000,
  verbose = FALSE) %>%
  Seurat::FindVariableFeatures(., nfeatures = 3000, verbose = FALSE) %>% 
  Seurat::ScaleData(., verbose = FALSE)

1st we need to determine the marker genes for each high-level cluster

markers_lvl1_out <- "{myeloid}/{robj_dir}/markers_lvl1_myeloid.RDS" %>% 
  glue::glue() %>% 
  here::here()
# file.remove(markers_lvl1_out)
if (file.exists(markers_lvl1_out)) {
  
  markers_lvl1 <- readRDS(file = markers_lvl1_out)
  
} else {
  #### Extract marker genes from each cluster ####
  Seurat::Idents(object = sc_obj) <- sc_obj@meta.data[, "annotation_level_1"]
  markers_lvl1 <- Seurat::FindAllMarkers(
    object = sc_obj, 
    assay = "RNA",
    slot = "data",
    verbose = TRUE, 
    only.pos = TRUE,
    max.cells.per.ident = 200)
  
  saveRDS(object = markers_lvl1, file = markers_lvl1_out)
}
DT::datatable(markers_lvl1, filter = "top")

Now find the markers for Myeloid cells level 5 only between them

markers_lvl5_out <- "{myeloid}/{robj_dir}/markers_lvl5_myeloid.RDS" %>% 
  glue::glue() %>% 
  here::here()
# file.remove(markers_lvl5_out)

if (file.exists(markers_lvl5_out)) {
  
  markers_lvl5 <- readRDS(file = markers_lvl5_out)
  
} else {

  #### Extract marker genes from each cluster ####
  Seurat::Idents(object = lvl5_mye_obj) <- lvl5_mye_obj@meta.data[, "annotation_deconv"]
  
  markers_lvl5 <- Seurat::FindAllMarkers(
    object = lvl5_mye_obj,
    assay = "RNA",
    slot = "data",
    verbose = TRUE, 
    only.pos = TRUE,
    max.cells.per.ident = 200) 
  
  saveRDS(object = markers_lvl5, file = markers_lvl5_out)
}

DT::datatable(markers_lvl5, filter = "top")

Join markers from both runs. Since we still want to keep the CD4 T cells markers in the lower level markers we will add them to each one.

# Keep markers for major cell types
markers_lvl1_sub <- markers_lvl1 %>%
    dplyr::filter( ( avg_log2FC > 1 | pct.1 - pct.2 > 0.75) &
                   p_val_adj < 0.01 & pct.1 > 0.3 ) %>%
    dplyr::filter(cluster != "myeloid")

Add general myeloid cell markers to the specific cell types

markers_lvl5_update <- lapply(as.character(unique(markers_lvl5$cluster)),
  function(i) {
    tmp <- markers_lvl5 %>%
      dplyr::filter(cluster == i) %>%
      dplyr::mutate(cluster = i) %>%
      dplyr::filter(avg_log2FC > 0.75 | pct.1 - pct.2 > 0.3)
    # Only add myeloid to myeloid
    if (str_detect(string = i, pattern = "PDC", negate = TRUE)) {
        tmp <- tmp %>%
          dplyr::bind_rows(markers_lvl1 %>%
              filter(cluster == "myeloid" & avg_log2FC > 1.5))
    }
    tmp
    }) %>% bind_rows()

Combine level-1 and level-5 markers

all_markers <- dplyr::bind_rows(markers_lvl1_sub, markers_lvl5_update)

Subset SC object so all the cells from the same cell type to come from the same batch

# subset most representative sample(s)
ns <- with(sc_obj@meta.data, table(gem_id, annotation_deconv))
ns <- as.matrix(unclass(ns))
m <- 100 # Max cells per cell type
# Extract cell barcodes
meta <- sc_obj@meta.data

id <- lapply(colnames(ns), function(nm) {
    x <- ns[, nm]
    # Initialize variables
    n <- 0    # N of cells
    s <- c()  # Gem IDs
    b <- c()  # Cell barcodes
    while (n < m & length(x) > 0) {
        # select gem id with the most cells
        i <- which.max(x)
        # Add gem id to vector s
        s <- c(s, names(i))
        # Add number of cells per cell type to n
        n <- n + x[i]
        # Remove gem id from x to move on to the next
        x <- x[-i]
        # extract barcode
        barcode <- rownames(meta[meta[, "gem_id"] == names(i) &
                            meta[, "annotation_deconv"] == nm, ])
        # make sure it adds up to m
        # print(barcode)
        if (length(b) + length(barcode) > m) {
          barcode <- sample(x = barcode, size = m - length(b), replace = FALSE)
        }
        b <- c(b, barcode)  # Cell barcodes
        # print(b)

    }
    return(b)
})

sc_sub <- sc_obj[, unlist(id)]
table(sc_sub$annotation_deconv)
## 
##                            aDC1                            aDC2                            aDC3             C1Q HLA macrophages                           CD4_T                         Cycling                       Cytotoxic                      DC1 mature                   DC1 precursor                             DC2                             DC3                             DC4                             DC5                      epithelial                             FDC                            GCBC                       IFN1+ PDC                         IL7R DC          IL7R MMP12 macrophages          ITGAX ZEB2 macrophages                  M1 Macrophages                      Mast cells                       Monocytes                         NBC_MBC         Neutrophil Granulocytes                              PC                             PDC SELENOP FUCA1 PTGDS macrophages 
##                              81                              16                              83                             100                             100                              70                             100                              86                             100                             100                              20                              28                              95                             100                             100                             100                              50                              27                             100                             100                              72                              37                              93                             100                             100                             100                             100                             100

SPOTlight deconvolution

2nd run deconvolution

Save data

decon_fn <- "{myeloid}/{robj_dir}/spotlight_ls_myeloid.rds" %>%
  glue::glue() %>% 
  here::here()
# if (! file.exists(decon_fn) ) {
saveRDS(
  object = spotlight_ls,
  file = decon_fn)
# } else {
#   spotlight_ls <- readRDS(file = here::here(glue::glue("{myeloid}/{robj_dir}/spotlight_ls_myeloid.rds")))
# }

3rd Check Topic profiles

nmf_mod_ls <- spotlight_ls[[1]]
nmf_mod <- nmf_mod_ls[[1]]

h <- NMF::coef(nmf_mod)
rownames(h) <- paste("Topic", 1:nrow(h), sep = "_")
topic_profile_plts <- SPOTlight::dot_plot_profiles_fun(
  h = h,
  train_cell_clust = nmf_mod_ls[[2]])

topic_profile_plts[[2]] +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90), 
                 axis.text = ggplot2::element_text(size = 12))

Visualize deconvolution results

decon_mtrx <- spotlight_ls[[2]]
decon_mtrx <- decon_mtrx[, colnames(decon_mtrx) != "res_ss"]

# Set as 0 cell types predicted to be under 1 % of the spot
# decon_mtrx[decon_mtrx < 0.03] <- 0

Change column names

nm_df <- data.frame(
  mod_nm = stringr::str_replace_all(string = unique(all_markers$cluster),
                                    pattern = "[[:punct:]]|\\s|\\+" ,
                                    replacement = "."),
  plt_nm = unique(all_markers$cluster)
  )

# Save nm_df
"{myeloid}/{robj_dir}/myeloid_nm_df.rds" %>%
  glue::glue() %>% 
  here::here() %>%
  saveRDS(
    object = nm_df,
    file = .)


new_cn <- data.frame(mod_nm = colnames(decon_mtrx)) %>%
  dplyr::left_join(nm_df, by = "mod_nm") %>%
  # Central.Mem.PASK. fives some trouble because it only changes between + an -
  # negative goes first and distinct solves it automatically
  dplyr::distinct() %>%
  dplyr::pull(plt_nm)

colnames(decon_mtrx) <- new_cn

We are going to add the deconvolution to the Seurat object.

metadata <- cbind(sp_obj@meta.data, decon_mtrx)
sp_obj@meta.data <- metadata

Look at cells topic profile

basis_spotlight <- data.frame(NMF::basis(spotlight_ls[[1]][[1]]))

train_labs <- spotlight_ls[[1]][[2]]
colnames(basis_spotlight) <- unique(stringr::str_wrap(train_labs, width = 30))

basis_spotlight[basis_spotlight < 0.0000001] <- 0

DT::datatable(basis_spotlight, filter = "top")

Session Info

sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
## 
## Matrix products: default
## BLAS/LAPACK: /scratch/groups/singlecell/software/anaconda3/envs/spatial_r/lib/libopenblasp-r0.3.12.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] tidyr_1.2.0         nnls_1.4            edgeR_3.32.1        limma_3.46.0        Matrix_1.4-0        NMF_0.23.0          Biobase_2.50.0      BiocGenerics_0.36.1 cluster_2.1.2       rngtools_1.5.2      pkgmaker_0.32.2     registry_0.5-1      tibble_3.1.6        purrr_0.3.4         SPOTlight_0.1.7     readr_2.1.2         stringr_1.4.0       dplyr_1.0.8         cowplot_1.1.1       ggpubr_0.4.0        ggplot2_3.3.5       SeuratObject_4.0.4  Seurat_4.1.0       
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.1       plyr_1.8.6            igraph_1.2.11         lazyeval_0.2.2        splines_4.0.1         crosstalk_1.2.0       listenv_0.8.0         scattermore_0.8       gridBase_0.4-7        digest_0.6.29         foreach_1.5.2         htmltools_0.5.2       fansi_1.0.2           magrittr_2.0.2        doParallel_1.0.17     tensor_1.5            ROCR_1.0-11           tzdb_0.2.0            globals_0.14.0        matrixStats_0.61.0    vroom_1.5.7           spatstat.sparse_2.1-0 colorspace_2.0-3      ggrepel_0.9.1         xfun_0.30             crayon_1.5.0          jsonlite_1.8.0        spatstat.data_2.1-2   iterators_1.0.14      survival_3.3-1        zoo_1.8-9             glue_1.6.2            polyclip_1.10-0       gtable_0.3.0          leiden_0.3.9          car_3.0-12            future.apply_1.8.1    abind_1.4-5           scales_1.1.1          DBI_1.1.2             rstatix_0.7.0         spatstat.random_2.1-0 miniUI_0.1.1.1        Rcpp_1.0.8            viridisLite_0.4.0     xtable_1.8-4          reticulate_1.24       spatstat.core_2.4-0   bit_4.0.4             DT_0.21               htmlwidgets_1.5.4     httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2       
##  [55] ica_1.0-2             farver_2.1.0          pkgconfig_2.0.3       sass_0.4.0            uwot_0.1.11           deldir_1.0-6          locfit_1.5-9.4        utf8_1.2.2            here_1.0.1            labeling_0.4.2        tidyselect_1.1.2      rlang_1.0.2           reshape2_1.4.4        later_1.3.0           munsell_0.5.0         tools_4.0.1           cli_3.2.0             generics_0.1.2        broom_0.7.12          ggridges_0.5.3        evaluate_0.15         fastmap_1.1.0         yaml_2.3.5            goftest_1.2-3         knitr_1.37            bit64_4.0.5           fitdistrplus_1.1-6    RANN_2.6.1            pbapply_1.5-0         future_1.24.0         nlme_3.1-155          mime_0.12             compiler_4.0.1        plotly_4.10.0         png_0.1-7             ggsignif_0.6.3        spatstat.utils_2.3-0  bslib_0.3.1           stringi_1.7.6         highr_0.9             lattice_0.20-45       vctrs_0.3.8           pillar_1.7.0          lifecycle_1.0.1       spatstat.geom_2.3-2   lmtest_0.9-39         jquerylib_0.1.4       RcppAnnoy_0.0.19      data.table_1.14.2     irlba_2.3.5           httpuv_1.6.5          patchwork_1.1.1       R6_2.5.1              promises_1.2.0.1     
## [109] KernSmooth_2.23-20    gridExtra_2.3         parallelly_1.30.0     codetools_0.2-18      MASS_7.3-55           assertthat_0.2.1      rprojroot_2.0.2       withr_2.5.0           sctransform_0.3.3     mgcv_1.8-39           hms_1.1.1             grid_4.0.1            rpart_4.1.16          rmarkdown_2.12        carData_3.0-5         Rtsne_0.15            shiny_1.7.1